import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import calinski_harabasz_score
from scipy.cluster.hierarchy import dendrogram
from scipy.cluster import hierarchy
import scipy.cluster.hierarchy as shc
from kneed import KneeLocator
from sklearn.metrics import silhouette_samples, silhouette_score
# Ignore harmless warnings
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
from statsmodels.stats.outliers_influence import variance_inflation_factor as ViF
from sklearn.neighbors import NearestCentroid
Produksi_Buah_2020_2022=pd.read_excel("Produksi Tanaman Buah-buahan 2020-2022 gabungan.xlsx")
Produksi_Buah_2020_2022=Produksi_Buah_2020_2022.set_index("Provinsi")
Produksi_Buah_2020_2022.head()
| Alpukat | Belimbing | Duku | Durian | Jambu Biji | Jambu Air | Jeruk Siam | Jeruk Besar | Mangga | Manggis | ... | Sawo | Markisa | Sirsak | Sukun | Melon | Semangka | Blewah | Apel | Anggur | Stroberi | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Provinsi | |||||||||||||||||||||
| ACEH | 150887 | 4309 | 22393 | 130595 | 23115 | 14949 | 46396 | 30986 | 79170 | 8516.0 | ... | 13592 | 1084.0 | 3505 | 5122 | 1084 | 21116.0 | NaN | 301.0 | NaN | 143.0 |
| SUMATERA UTARA | 117901 | 19347 | 58761 | 313678 | 40725 | 48230 | 1183180 | 5245 | 127630 | 71314.0 | ... | 62409 | 837.0 | 9001 | 6314 | 5126 | 112944.0 | 1.0 | NaN | 6.0 | 254.0 |
| SUMATERA BARAT | 266593 | 2950 | 20591 | 521775 | 28045 | 29095 | 381107 | 1684 | 49960 | 221084.0 | ... | 36591 | 36500.0 | 27086 | 9692 | 5611 | 51088.0 | 20.0 | 47.0 | 4.0 | 438.0 |
| RIAU | 4579 | 6706 | 21868 | 75321 | 33291 | 28619 | 254484 | 4780 | 53924 | 30975.0 | ... | 15694 | 6.0 | 4623 | 9492 | 3683 | 64243.0 | 58.0 | NaN | 59.0 | NaN |
| JAMBI | 33191 | 3579 | 50626 | 64455 | 11639 | 11071 | 168150 | 868 | 18281 | 30624.0 | ... | 11771 | 84.0 | 6031 | 8945 | 2709 | 16897.0 | 52.0 | NaN | 12.0 | 21.0 |
5 rows × 26 columns
Produksi_Buah_2020_2022.describe()
| Alpukat | Belimbing | Duku | Durian | Jambu Biji | Jambu Air | Jeruk Siam | Jeruk Besar | Mangga | Manggis | ... | Sawo | Markisa | Sirsak | Sukun | Melon | Semangka | Blewah | Apel | Anggur | Stroberi | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 34.000000 | 34.000000 | 34.000000 | 34.000000 | 34.000000 | 34.000000 | 3.400000e+01 | 34.000000 | 3.400000e+01 | 33.000000 | ... | 34.000000 | 29.000000 | 34.000000 | 34.000000 | 34.000000 | 33.000000 | 22.000000 | 1.000000e+01 | 26.000000 | 20.00000 |
| mean | 63061.588235 | 11194.205882 | 21918.029412 | 119659.000000 | 37983.705882 | 18438.058824 | 2.219542e+05 | 11039.441176 | 2.659684e+05 | 29394.424242 | ... | 15407.500000 | 1838.620690 | 12622.235294 | 15528.176471 | 11353.441176 | 40678.151515 | 1502.636364 | 1.549668e+05 | 1445.769231 | 2355.30000 |
| std | 117636.774955 | 35054.048100 | 27213.153865 | 194454.880051 | 77259.897458 | 31456.862337 | 4.903489e+05 | 26544.667640 | 7.499428e+05 | 52298.182488 | ... | 22026.930314 | 6887.045896 | 23906.914817 | 25926.683755 | 34873.462228 | 71321.659603 | 4394.608109 | 4.895717e+05 | 6492.170449 | 8387.45517 |
| min | 120.000000 | 47.000000 | 27.000000 | 650.000000 | 66.000000 | 16.000000 | 6.490000e+02 | 45.000000 | 5.918000e+03 | 9.000000 | ... | 1.000000 | 1.000000 | 102.000000 | 19.000000 | 10.000000 | 394.000000 | 1.000000 | 3.000000e+00 | 1.000000 | 1.00000 |
| 25% | 2824.250000 | 1142.750000 | 4376.750000 | 20012.500000 | 3964.500000 | 3117.250000 | 1.445975e+04 | 610.500000 | 1.842575e+04 | 2399.000000 | ... | 703.750000 | 8.000000 | 1256.250000 | 2347.000000 | 968.250000 | 4647.000000 | 10.250000 | 1.625000e+01 | 7.000000 | 23.75000 |
| 50% | 10819.000000 | 2211.500000 | 14785.000000 | 55420.500000 | 11281.500000 | 5934.500000 | 5.845900e+04 | 1388.500000 | 4.531900e+04 | 6371.000000 | ... | 5211.000000 | 70.000000 | 3878.000000 | 5535.000000 | 1903.500000 | 16897.000000 | 35.000000 | 5.750000e+01 | 27.500000 | 224.00000 |
| 75% | 56122.500000 | 4250.500000 | 22849.750000 | 113428.250000 | 31145.500000 | 18469.500000 | 1.712295e+05 | 4477.500000 | 9.367225e+04 | 28751.000000 | ... | 15168.500000 | 329.000000 | 8655.500000 | 9642.000000 | 4765.250000 | 47229.000000 | 168.750000 | 2.527500e+02 | 111.750000 | 452.50000 |
| max | 517574.000000 | 200209.000000 | 138448.000000 | 971027.000000 | 308606.000000 | 154531.000000 | 2.611658e+06 | 106417.000000 | 4.079350e+06 | 221084.000000 | ... | 78460.000000 | 36500.000000 | 98601.000000 | 116228.000000 | 188639.000000 | 368149.000000 | 19709.000000 | 1.548312e+06 | 33207.000000 | 37826.00000 |
8 rows × 26 columns
Produksi_Buah_2020_2022.info()
<class 'pandas.core.frame.DataFrame'> Index: 34 entries, ACEH to PAPUA Data columns (total 26 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Alpukat 34 non-null int64 1 Belimbing 34 non-null int64 2 Duku 34 non-null int64 3 Durian 34 non-null int64 4 Jambu Biji 34 non-null int64 5 Jambu Air 34 non-null int64 6 Jeruk Siam 34 non-null int64 7 Jeruk Besar 34 non-null int64 8 Mangga 34 non-null int64 9 Manggis 33 non-null float64 10 Nangka 34 non-null int64 11 Nanas 33 non-null float64 12 Pepaya 34 non-null int64 13 Pisang 34 non-null int64 14 Rambutan 34 non-null int64 15 Salak 34 non-null int64 16 Sawo 34 non-null int64 17 Markisa 29 non-null float64 18 Sirsak 34 non-null int64 19 Sukun 34 non-null int64 20 Melon 34 non-null int64 21 Semangka 33 non-null float64 22 Blewah 22 non-null float64 23 Apel 10 non-null float64 24 Anggur 26 non-null float64 25 Stroberi 20 non-null float64 dtypes: float64(8), int64(18) memory usage: 7.2+ KB
sns.set(rc={"figure.figsize":(10, 6)})
box2020_2022 = sns.boxplot(data=Produksi_Buah_2020_2022,orient="h")
plt.xlabel("Berat Hasil Produksi Buah (dari jutaan ton)")
plt.ylabel("Jenis Hasil Produksi Buah")
plt.show(box2020_2022)
ProduksiBuah20_22=Produksi_Buah_2020_2022.drop(columns=['Markisa','Blewah'])
#kedua variabel tidak mempunyai produksi diantara 34 provinsi pada tahun 2021 sehingga variabel tidak digunakan.
ProduksiBuah20_22= ProduksiBuah20_22.fillna(0)
ProduksiBuah20_22
| Alpukat | Belimbing | Duku | Durian | Jambu Biji | Jambu Air | Jeruk Siam | Jeruk Besar | Mangga | Manggis | ... | Rambutan | Salak | Sawo | Sirsak | Sukun | Melon | Semangka | Apel | Anggur | Stroberi | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Provinsi | |||||||||||||||||||||
| ACEH | 150887 | 4309 | 22393 | 130595 | 23115 | 14949 | 46396 | 30986 | 79170 | 8516.0 | ... | 100626 | 3034 | 13592 | 3505 | 5122 | 1084 | 21116.0 | 301.0 | 0.0 | 143.0 |
| SUMATERA UTARA | 117901 | 19347 | 58761 | 313678 | 40725 | 48230 | 1183180 | 5245 | 127630 | 71314.0 | ... | 79792 | 859993 | 62409 | 9001 | 6314 | 5126 | 112944.0 | 0.0 | 6.0 | 254.0 |
| SUMATERA BARAT | 266593 | 2950 | 20591 | 521775 | 28045 | 29095 | 381107 | 1684 | 49960 | 221084.0 | ... | 87406 | 6150 | 36591 | 27086 | 9692 | 5611 | 51088.0 | 47.0 | 4.0 | 438.0 |
| RIAU | 4579 | 6706 | 21868 | 75321 | 33291 | 28619 | 254484 | 4780 | 53924 | 30975.0 | ... | 89349 | 12387 | 15694 | 4623 | 9492 | 3683 | 64243.0 | 0.0 | 59.0 | 0.0 |
| JAMBI | 33191 | 3579 | 50626 | 64455 | 11639 | 11071 | 168150 | 868 | 18281 | 30624.0 | ... | 20499 | 17355 | 11771 | 6031 | 8945 | 2709 | 16897.0 | 0.0 | 12.0 | 21.0 |
| SUMATERA SELATAN | 105419 | 4068 | 138448 | 117258 | 18863 | 30010 | 172256 | 1184 | 52521 | 5916.0 | ... | 40836 | 6044 | 21356 | 4299 | 8519 | 5271 | 45418.0 | 3.0 | 12.0 | 194.0 |
| BENGKULU | 22152 | 1089 | 2473 | 55868 | 5075 | 3337 | 69186 | 1370 | 22511 | 15944.0 | ... | 18884 | 10062 | 5576 | 1413 | 1742 | 1368 | 2819.0 | 48.0 | 37.0 | 159.0 |
| LAMPUNG | 63732 | 10199 | 29695 | 62604 | 47611 | 31368 | 180335 | 3489 | 62078 | 28036.0 | ... | 59123 | 26553 | 44535 | 8682 | 20771 | 2120 | 62769.0 | 0.0 | 90.0 | 1.0 |
| KEP. BANGKA BELITUNG | 4090 | 393 | 2103 | 12793 | 1745 | 4813 | 5472 | 279 | 5918 | 2399.0 | ... | 5960 | 2998 | 1795 | 440 | 1558 | 1023 | 10017.0 | 0.0 | 0.0 | 0.0 |
| KEP. RIAU | 480 | 337 | 893 | 11568 | 1496 | 1037 | 698 | 69 | 6122 | 564.0 | ... | 5060 | 20172 | 2717 | 1094 | 3385 | 277 | 5164.0 | 0.0 | 0.0 | 0.0 |
| DKI JAKARTA | 3423 | 7676 | 34 | 650 | 4554 | 6361 | 649 | 45 | 20108 | 0.0 | ... | 11060 | 143 | 686 | 226 | 2506 | 10 | 0.0 | 0.0 | 14.0 | 0.0 |
| JAWA BARAT | 336037 | 25248 | 23002 | 240136 | 228644 | 78419 | 152813 | 23314 | 1337834 | 158602.0 | ... | 436610 | 80852 | 78460 | 42522 | 75440 | 2505 | 29679.0 | 0.0 | 168.0 | 37826.0 |
| JAWA TENGAH | 279669 | 57908 | 45767 | 479012 | 308606 | 154531 | 84477 | 55588 | 1445394 | 28751.0 | ... | 358116 | 1426748 | 63432 | 63961 | 116228 | 81932 | 203744.0 | 67.0 | 389.0 | 2898.0 |
| DI YOGYAKARTA | 24737 | 2274 | 8730 | 24878 | 15835 | 11786 | 7639 | 534 | 90355 | 8044.0 | ... | 41298 | 166790 | 8678 | 8576 | 38525 | 43781 | 20572.0 | 0.0 | 57.0 | 45.0 |
| JAWA TIMUR | 517574 | 200209 | 52445 | 971027 | 292169 | 83538 | 2611658 | 106417 | 4079350 | 156703.0 | ... | 405513 | 551285 | 58148 | 98601 | 73982 | 188639 | 368149.0 | 1548312.0 | 1843.0 | 2482.0 |
| BANTEN | 5866 | 4548 | 9878 | 90105 | 17618 | 19643 | 2411 | 515 | 94778 | 22488.0 | ... | 92262 | 3830 | 9077 | 11994 | 36096 | 975 | 4788.0 | 0.0 | 18.0 | 23.0 |
| BALI | 11263 | 1596 | 5465 | 74130 | 10924 | 6065 | 861555 | 5526 | 161993 | 59340.0 | ... | 58417 | 128845 | 10313 | 1472 | 1186 | 803 | 53426.0 | 6.0 | 33207.0 | 1155.0 |
| NUSA TENGGARA BARAT | 33294 | 1996 | 2471 | 102770 | 44368 | 7947 | 19847 | 3325 | 435121 | 62272.0 | ... | 55385 | 262 | 41150 | 84638 | 1761 | 7502 | 63944.0 | 772.0 | 1312.0 | 496.0 |
| NUSA TENGGARA TIMUR | 70890 | 2149 | 27 | 9897 | 32179 | 5539 | 160158 | 6106 | 229802 | 73.0 | ... | 18758 | 8604 | 3253 | 11768 | 10532 | 1035 | 6703.0 | 108.0 | 42.0 | 352.0 |
| KALIMANTAN BARAT | 4548 | 4075 | 20404 | 74767 | 13412 | 8950 | 324928 | 1978 | 23131 | 17111.0 | ... | 37101 | 5498 | 13112 | 5430 | 5948 | 1169 | 16216.0 | 0.0 | 30.0 | 0.0 |
| KALIMANTAN TENGAH | 1844 | 3274 | 4014 | 18952 | 10389 | 6626 | 21798 | 1088 | 6871 | 1280.0 | ... | 31475 | 2880 | 4450 | 2590 | 4011 | 2470 | 40153.0 | 0.0 | 25.0 | 312.0 |
| KALIMANTAN SELATAN | 120 | 2388 | 20608 | 54851 | 5685 | 2545 | 387016 | 1293 | 22399 | 3589.0 | ... | 45627 | 2664 | 4846 | 4094 | 6805 | 2720 | 47229.0 | 0.0 | 1.0 | 0.0 |
| KALIMANTAN TIMUR | 2460 | 3342 | 17592 | 32710 | 5834 | 4380 | 31573 | 1407 | 12403 | 938.0 | ... | 31241 | 34227 | 6819 | 3068 | 4925 | 1457 | 14669.0 | 0.0 | 0.0 | 0.0 |
| KALIMANTAN UTARA | 261 | 661 | 13196 | 23194 | 1119 | 1986 | 15872 | 285 | 20784 | 281.0 | ... | 24914 | 13257 | 332 | 1204 | 2053 | 1687 | 4647.0 | 0.0 | 2.0 | 0.0 |
| SULAWESI UTARA | 17925 | 1304 | 21625 | 38768 | 8585 | 3403 | 3266 | 1299 | 48286 | 6371.0 | ... | 28672 | 37950 | 2 | 5702 | 3070 | 2413 | 2420.0 | 0.0 | 2.0 | 0.0 |
| SULAWESI TENGAH | 17891 | 763 | 15185 | 116981 | 2195 | 3044 | 10056 | 666 | 37718 | 5294.0 | ... | 28801 | 2535 | 574 | 1100 | 1163 | 6594 | 19115.0 | 0.0 | 128.0 | 3.0 |
| SULAWESI SELATAN | 24000 | 1523 | 67959 | 167739 | 36423 | 5804 | 30905 | 106276 | 335685 | 9524.0 | ... | 108485 | 34876 | 2064 | 5400 | 40360 | 2309 | 25453.0 | 0.0 | 10.0 | 279.0 |
| SULAWESI TENGGARA | 3275 | 1731 | 11905 | 48369 | 5650 | 4700 | 92361 | 3570 | 47350 | 3667.0 | ... | 37403 | 5473 | 757 | 3662 | 8024 | 772 | 3626.0 | 0.0 | 119.0 | 24.0 |
| GORONTALO | 121 | 47 | 1026 | 14641 | 66 | 16 | 62034 | 335 | 8747 | 9.0 | ... | 4100 | 30 | 1 | 102 | 19 | 142 | 394.0 | 0.0 | 0.0 | 0.0 |
| SULAWESI BARAT | 1148 | 294 | 23170 | 54973 | 936 | 807 | 54884 | 592 | 43288 | 3835.0 | ... | 18296 | 224 | 182 | 533 | 3308 | 72 | 589.0 | 0.0 | 0.0 | 0.0 |
| MALUKU | 4442 | 1551 | 8460 | 36085 | 3768 | 3596 | 47175 | 2595 | 18860 | 3192.0 | ... | 3549 | 6886 | 60 | 1933 | 9094 | 966 | 3067.0 | 4.0 | 0.0 | 0.0 |
| MALUKU UTARA | 2674 | 725 | 9582 | 15432 | 1684 | 1959 | 7184 | 692 | 14102 | 2971.0 | ... | 20983 | 6297 | 24 | 1135 | 2294 | 329 | 2840.0 | 0.0 | 1.0 | 0.0 |
| PAPUA BARAT | 10375 | 1785 | 14385 | 10675 | 27576 | 1865 | 13989 | 1661 | 16284 | 42.0 | ... | 13179 | 7575 | 1285 | 2645 | 4132 | 796 | 2316.0 | 0.0 | 0.0 | 1.0 |
| PAPUA | 1233 | 559 | 432 | 1749 | 1622 | 855 | 80932 | 280 | 14168 | 267.0 | ... | 2261 | 324 | 114 | 626 | 956 | 6667 | 16165.0 | 0.0 | 2.0 | 0.0 |
34 rows × 24 columns
sns.relplot(data=ProduksiBuah20_22)
<seaborn.axisgrid.FacetGrid at 0x192996d8a90>
sns.heatmap(ProduksiBuah20_22.isnull(),yticklabels=False,cbar=False,cmap='viridis')
<AxesSubplot:ylabel='Provinsi'>
sns.heatmap(ProduksiBuah20_22.corr())
<AxesSubplot:>
sns.set(rc={"figure.figsize":(10, 6)})
box20_22 = sns.boxplot(data=ProduksiBuah20_22,orient="h")
plt.xlabel("Berat Hasil Produksi Buah (dari jutaan ton)")
plt.ylabel("Jenis Hasil Produksi Buah")
plt.show(box20_22)
ProduksiBuah20_22_copy = ProduksiBuah20_22.copy()
pb = ProduksiBuah20_22.to_numpy()
scaler = MinMaxScaler()
#scaler = StandardScaler()
ProduksiBuah20_22_scaled = scaler.fit_transform(ProduksiBuah20_22.to_numpy())
ProduksiBuah20_22_scaled = pd.DataFrame(ProduksiBuah20_22_scaled, columns=ProduksiBuah20_22.columns,index=ProduksiBuah20_22.index)
print("Scaled Dataset Using MinMaxScaler")
ProduksiBuah20_22_scaled
Scaled Dataset Using MinMaxScaler
| Alpukat | Belimbing | Duku | Durian | Jambu Biji | Jambu Air | Jeruk Siam | Jeruk Besar | Mangga | Manggis | ... | Rambutan | Salak | Sawo | Sirsak | Sukun | Melon | Semangka | Apel | Anggur | Stroberi | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Provinsi | |||||||||||||||||||||
| ACEH | 0.291363 | 0.021293 | 0.161580 | 0.133912 | 0.074703 | 0.096644 | 0.017521 | 0.290875 | 0.017983 | 0.038519 | ... | 0.226465 | 0.002106 | 0.173224 | 0.034549 | 0.043912 | 0.005694 | 0.057357 | 0.000194 | 0.000000 | 0.003780 |
| SUMATERA UTARA | 0.227616 | 0.096422 | 0.424314 | 0.322584 | 0.131779 | 0.312034 | 0.452902 | 0.048885 | 0.029879 | 0.322565 | ... | 0.178499 | 0.602756 | 0.795422 | 0.090346 | 0.054170 | 0.027122 | 0.306789 | 0.000000 | 0.000181 | 0.006715 |
| SUMATERA BARAT | 0.514969 | 0.014503 | 0.148561 | 0.537034 | 0.090682 | 0.188195 | 0.145713 | 0.015408 | 0.010812 | 1.000000 | ... | 0.196029 | 0.004290 | 0.466358 | 0.273952 | 0.083238 | 0.029693 | 0.138770 | 0.000030 | 0.000120 | 0.011579 |
| RIAU | 0.008617 | 0.033268 | 0.157787 | 0.076951 | 0.107685 | 0.185115 | 0.097217 | 0.044514 | 0.011785 | 0.140105 | ... | 0.200502 | 0.008661 | 0.200015 | 0.045899 | 0.081517 | 0.019472 | 0.174503 | 0.000000 | 0.001777 | 0.000000 |
| JAMBI | 0.063911 | 0.017646 | 0.365544 | 0.065753 | 0.037509 | 0.071546 | 0.064152 | 0.007737 | 0.003035 | 0.138517 | ... | 0.041989 | 0.012143 | 0.150015 | 0.060194 | 0.076810 | 0.014309 | 0.045897 | 0.000000 | 0.000361 | 0.000555 |
| SUMATERA SELATAN | 0.203494 | 0.020089 | 1.000000 | 0.120168 | 0.060922 | 0.194117 | 0.065724 | 0.010708 | 0.011441 | 0.026759 | ... | 0.088811 | 0.004215 | 0.272180 | 0.042610 | 0.073144 | 0.027891 | 0.123369 | 0.000002 | 0.000361 | 0.005129 |
| BENGKULU | 0.042578 | 0.005206 | 0.017671 | 0.056904 | 0.016235 | 0.021493 | 0.026249 | 0.012456 | 0.004073 | 0.072117 | ... | 0.038271 | 0.007032 | 0.071056 | 0.013310 | 0.014827 | 0.007199 | 0.007657 | 0.000031 | 0.001114 | 0.004203 |
| LAMPUNG | 0.122933 | 0.050719 | 0.214332 | 0.063845 | 0.154097 | 0.202906 | 0.068819 | 0.032377 | 0.013787 | 0.126812 | ... | 0.130913 | 0.018590 | 0.567609 | 0.087107 | 0.178575 | 0.011186 | 0.170499 | 0.000000 | 0.002710 | 0.000026 |
| KEP. BANGKA BELITUNG | 0.007672 | 0.001729 | 0.014998 | 0.012514 | 0.005442 | 0.031046 | 0.001847 | 0.002200 | 0.000000 | 0.010851 | ... | 0.008516 | 0.002080 | 0.022865 | 0.003432 | 0.013243 | 0.005370 | 0.027209 | 0.000000 | 0.000000 | 0.000000 |
| KEP. RIAU | 0.000696 | 0.001449 | 0.006256 | 0.011251 | 0.004635 | 0.006608 | 0.000019 | 0.000226 | 0.000050 | 0.002551 | ... | 0.006444 | 0.014118 | 0.034617 | 0.010071 | 0.028965 | 0.001415 | 0.014027 | 0.000000 | 0.000000 | 0.000000 |
| DKI JAKARTA | 0.006383 | 0.038114 | 0.000051 | 0.000000 | 0.014546 | 0.041064 | 0.000000 | 0.000000 | 0.003484 | 0.000000 | ... | 0.020258 | 0.000079 | 0.008731 | 0.001259 | 0.021401 | 0.000000 | 0.000000 | 0.000000 | 0.000422 | 0.000000 |
| JAWA BARAT | 0.649173 | 0.125903 | 0.165979 | 0.246797 | 0.740837 | 0.507414 | 0.058278 | 0.218751 | 0.326976 | 0.717383 | ... | 1.000000 | 0.056649 | 1.000000 | 0.430664 | 0.649012 | 0.013227 | 0.080617 | 0.000000 | 0.005059 | 1.000000 |
| JAWA TENGAH | 0.540239 | 0.289071 | 0.330441 | 0.492965 | 1.000000 | 1.000000 | 0.032106 | 0.522158 | 0.353382 | 0.130046 | ... | 0.819284 | 1.000000 | 0.808460 | 0.648321 | 1.000000 | 0.434302 | 0.553428 | 0.000043 | 0.011714 | 0.076614 |
| DI YOGYAKARTA | 0.047573 | 0.011126 | 0.062873 | 0.024968 | 0.051108 | 0.076174 | 0.002677 | 0.004597 | 0.020729 | 0.036384 | ... | 0.089875 | 0.116884 | 0.110593 | 0.086031 | 0.331351 | 0.232048 | 0.055880 | 0.000000 | 0.001717 | 0.001190 |
| JAWA TIMUR | 1.000000 | 1.000000 | 0.378685 | 1.000000 | 0.946727 | 0.540543 | 1.000000 | 1.000000 | 1.000000 | 0.708794 | ... | 0.928405 | 0.386380 | 0.741113 | 1.000000 | 0.636465 | 1.000000 | 1.000000 | 1.000000 | 0.055500 | 0.065616 |
| BANTEN | 0.011104 | 0.022487 | 0.071167 | 0.092186 | 0.056887 | 0.127023 | 0.000675 | 0.004418 | 0.021815 | 0.101717 | ... | 0.207209 | 0.002663 | 0.115678 | 0.120732 | 0.310449 | 0.005116 | 0.013006 | 0.000000 | 0.000542 | 0.000608 |
| BALI | 0.021534 | 0.007739 | 0.039286 | 0.075723 | 0.035192 | 0.039148 | 0.329722 | 0.051527 | 0.038315 | 0.268405 | ... | 0.129288 | 0.090288 | 0.131432 | 0.013909 | 0.010042 | 0.004204 | 0.145121 | 0.000004 | 1.000000 | 0.030535 |
| NUSA TENGGARA BARAT | 0.064110 | 0.009737 | 0.017656 | 0.105237 | 0.143586 | 0.051328 | 0.007353 | 0.030835 | 0.105366 | 0.281667 | ... | 0.122307 | 0.000163 | 0.524465 | 0.858242 | 0.014990 | 0.039718 | 0.173691 | 0.000499 | 0.039510 | 0.013113 |
| NUSA TENGGARA TIMUR | 0.136766 | 0.010501 | 0.000000 | 0.009529 | 0.104081 | 0.035744 | 0.061091 | 0.056979 | 0.054962 | 0.000330 | ... | 0.037981 | 0.006010 | 0.041448 | 0.118438 | 0.090466 | 0.005434 | 0.018207 | 0.000070 | 0.001265 | 0.009306 |
| KALIMANTAN BARAT | 0.008557 | 0.020124 | 0.147210 | 0.076380 | 0.043255 | 0.057820 | 0.124197 | 0.018172 | 0.004226 | 0.077396 | ... | 0.080212 | 0.003833 | 0.167106 | 0.054092 | 0.051020 | 0.006144 | 0.044047 | 0.000000 | 0.000903 | 0.000000 |
| KALIMANTAN TENGAH | 0.003332 | 0.016122 | 0.028803 | 0.018861 | 0.033458 | 0.042779 | 0.008100 | 0.009805 | 0.000234 | 0.005790 | ... | 0.067259 | 0.001998 | 0.056705 | 0.025259 | 0.034352 | 0.013041 | 0.109067 | 0.000000 | 0.000753 | 0.008248 |
| KALIMANTAN SELATAN | 0.000000 | 0.011696 | 0.148684 | 0.055856 | 0.018212 | 0.016367 | 0.147976 | 0.011732 | 0.004046 | 0.016234 | ... | 0.099841 | 0.001846 | 0.061752 | 0.040528 | 0.058395 | 0.014367 | 0.128288 | 0.000000 | 0.000030 | 0.000000 |
| KALIMANTAN TIMUR | 0.004522 | 0.016462 | 0.126895 | 0.033039 | 0.018694 | 0.028243 | 0.011844 | 0.012804 | 0.001592 | 0.004243 | ... | 0.066721 | 0.023969 | 0.086899 | 0.030112 | 0.042217 | 0.007671 | 0.039845 | 0.000000 | 0.000000 | 0.000000 |
| KALIMANTAN UTARA | 0.000272 | 0.003068 | 0.095137 | 0.023232 | 0.003413 | 0.012750 | 0.005830 | 0.002256 | 0.003650 | 0.001271 | ... | 0.052154 | 0.009271 | 0.004219 | 0.011188 | 0.017503 | 0.008890 | 0.012623 | 0.000000 | 0.000060 | 0.000000 |
| SULAWESI UTARA | 0.034409 | 0.006280 | 0.156031 | 0.039282 | 0.027611 | 0.021920 | 0.001002 | 0.011789 | 0.010401 | 0.028817 | ... | 0.060806 | 0.026578 | 0.000013 | 0.056853 | 0.026254 | 0.012739 | 0.006573 | 0.000000 | 0.000060 | 0.000000 |
| SULAWESI TENGAH | 0.034343 | 0.003577 | 0.109507 | 0.119882 | 0.006900 | 0.019597 | 0.003603 | 0.005838 | 0.007807 | 0.023946 | ... | 0.061103 | 0.001756 | 0.007303 | 0.010132 | 0.009844 | 0.034904 | 0.051922 | 0.000000 | 0.003855 | 0.000079 |
| SULAWESI SELATAN | 0.046149 | 0.007374 | 0.490764 | 0.172190 | 0.117836 | 0.037459 | 0.011588 | 0.998674 | 0.080956 | 0.043079 | ... | 0.244559 | 0.024424 | 0.026294 | 0.053787 | 0.347142 | 0.012188 | 0.069138 | 0.000000 | 0.000301 | 0.007376 |
| SULAWESI TENGGARA | 0.006097 | 0.008413 | 0.085811 | 0.049176 | 0.018098 | 0.030314 | 0.035125 | 0.033138 | 0.010171 | 0.016586 | ... | 0.080907 | 0.003815 | 0.009636 | 0.036142 | 0.068885 | 0.004040 | 0.009849 | 0.000000 | 0.003584 | 0.000634 |
| GORONTALO | 0.000002 | 0.000000 | 0.007217 | 0.014418 | 0.000000 | 0.000000 | 0.023510 | 0.002726 | 0.000695 | 0.000041 | ... | 0.004234 | 0.000000 | 0.000000 | 0.000000 | 0.000000 | 0.000700 | 0.001070 | 0.000000 | 0.000000 | 0.000000 |
| SULAWESI BARAT | 0.001987 | 0.001234 | 0.167193 | 0.055981 | 0.002820 | 0.005119 | 0.020772 | 0.005142 | 0.009174 | 0.017346 | ... | 0.036917 | 0.000136 | 0.002307 | 0.004376 | 0.028302 | 0.000329 | 0.001600 | 0.000000 | 0.000000 | 0.000000 |
| MALUKU | 0.008352 | 0.007514 | 0.060923 | 0.036517 | 0.011998 | 0.023169 | 0.017819 | 0.023972 | 0.003177 | 0.014438 | ... | 0.002965 | 0.004805 | 0.000752 | 0.018589 | 0.078092 | 0.005068 | 0.008331 | 0.000003 | 0.000000 | 0.000000 |
| MALUKU UTARA | 0.004936 | 0.003387 | 0.069029 | 0.015233 | 0.005244 | 0.012575 | 0.002503 | 0.006082 | 0.002009 | 0.013438 | ... | 0.043104 | 0.004393 | 0.000293 | 0.010487 | 0.019577 | 0.001691 | 0.007714 | 0.000000 | 0.000030 | 0.000000 |
| PAPUA BARAT | 0.019818 | 0.008683 | 0.103727 | 0.010331 | 0.089162 | 0.011966 | 0.005109 | 0.015192 | 0.002545 | 0.000190 | ... | 0.025136 | 0.005288 | 0.016365 | 0.025818 | 0.035393 | 0.004167 | 0.006291 | 0.000000 | 0.000000 | 0.000026 |
| PAPUA | 0.002151 | 0.002558 | 0.002926 | 0.001133 | 0.005043 | 0.005430 | 0.030748 | 0.002209 | 0.002025 | 0.001208 | ... | 0.000000 | 0.000206 | 0.001440 | 0.005320 | 0.008063 | 0.035291 | 0.043909 | 0.000000 | 0.000060 | 0.000000 |
34 rows × 24 columns
sns.relplot(data=ProduksiBuah20_22_scaled)
<seaborn.axisgrid.FacetGrid at 0x192991f66d0>
sns.set(rc={"figure.figsize":(10, 6)})
box20_22 = sns.boxplot(data=ProduksiBuah20_22_scaled,orient="h")
plt.xlabel("Berat Hasil Produksi Buah (dari jutaan ton)")
plt.ylabel("Jenis Hasil Produksi Buah")
plt.show(box20_22)
def Calc_VIF(Data):
#Calculating VIF
VIF = pd.DataFrame()
VIF['Variables'] = Data.columns
VIF['VIF'] = [ViF(Data.values, i) for i in range(Data.shape[1])]
return VIF
Calc_VIF(ProduksiBuah20_22_scaled)
| Variables | VIF | |
|---|---|---|
| 0 | Alpukat | 231.974512 |
| 1 | Belimbing | 1400.773457 |
| 2 | Duku | 13.171865 |
| 3 | Durian | 257.046865 |
| 4 | Jambu Biji | 359.296810 |
| 5 | Jambu Air | 1813.794145 |
| 6 | Jeruk Siam | 150.844569 |
| 7 | Jeruk Besar | 111.155240 |
| 8 | Mangga | 4794.463991 |
| 9 | Manggis | 106.549009 |
| 10 | Nangka | 186.399033 |
| 11 | Nanas | 155.402171 |
| 12 | Pepaya | 56.890755 |
| 13 | Pisang | 411.241944 |
| 14 | Rambutan | 309.253186 |
| 15 | Salak | 219.163682 |
| 16 | Sawo | 566.568867 |
| 17 | Sirsak | 442.691592 |
| 18 | Sukun | 342.950252 |
| 19 | Melon | 334.925102 |
| 20 | Semangka | 205.517478 |
| 21 | Apel | 2721.030432 |
| 22 | Anggur | 13.983284 |
| 23 | Stroberi | 428.268248 |
ProduksiBuah20_22_scaled_cluster = ProduksiBuah20_22_scaled.copy()
psc = ProduksiBuah20_22_scaled.to_numpy()
pca = PCA(n_components=3)
principalComponents = pca.fit_transform(ProduksiBuah20_22_scaled_cluster)
print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))
Explained variation per principal component: [0.65229876 0.086718 0.06117068]
principalDf = pd.DataFrame(data = principalComponents
, columns = ['principal component 1', 'principal component 2','principal component 3'],index = ProduksiBuah20_22_scaled.index)
#,'principal component 4','principal component 5','principal component 6','principal component 7','principal component 8','principal component 9','principal component 10'
principalDf
| principal component 1 | principal component 2 | principal component 3 | |
|---|---|---|---|
| Provinsi | |||
| ACEH | -0.092806 | 0.047337 | 0.024086 |
| SUMATERA UTARA | 0.472903 | -0.261143 | -0.021415 |
| SUMATERA BARAT | 0.384959 | -0.152729 | -0.776127 |
| RIAU | -0.083750 | -0.117560 | 0.058292 |
| JAMBI | -0.239929 | -0.039618 | 0.026891 |
| SUMATERA SELATAN | 0.007483 | -0.239554 | 0.309540 |
| BENGKULU | -0.411179 | 0.058462 | -0.062127 |
| LAMPUNG | 0.349867 | -0.437603 | 0.146488 |
| KEP. BANGKA BELITUNG | -0.482492 | 0.072243 | -0.000884 |
| KEP. RIAU | -0.487729 | 0.066953 | 0.001912 |
| DKI JAKARTA | -0.489489 | 0.075747 | 0.004065 |
| JAWA BARAT | 1.596476 | -1.023694 | -0.552769 |
| JAWA TENGAH | 2.174177 | -0.537051 | 0.835334 |
| DI YOGYAKARTA | -0.193676 | 0.018822 | 0.118851 |
| JAWA TIMUR | 3.453580 | 1.080844 | -0.173608 |
| BANTEN | -0.182623 | -0.058624 | 0.004994 |
| BALI | -0.156027 | 0.187157 | -0.351779 |
| NUSA TENGGARA BARAT | 0.220100 | -0.113325 | -0.286556 |
| NUSA TENGGARA TIMUR | -0.192434 | 0.107810 | -0.000499 |
| KALIMANTAN BARAT | -0.261015 | -0.015714 | -0.004639 |
| KALIMANTAN TENGAH | -0.390270 | 0.060895 | 0.019443 |
| KALIMANTAN SELATAN | -0.319736 | 0.101512 | 0.022756 |
| KALIMANTAN TIMUR | -0.355860 | 0.032815 | 0.048697 |
| KALIMANTAN UTARA | -0.427224 | 0.083359 | 0.032041 |
| SULAWESI UTARA | -0.397065 | 0.071251 | 0.027039 |
| SULAWESI TENGAH | -0.409109 | 0.090917 | 0.005913 |
| SULAWESI SELATAN | 0.106292 | 0.246762 | 0.437292 |
| SULAWESI TENGGARA | -0.393085 | 0.076199 | 0.025919 |
| GORONTALO | -0.513739 | 0.096608 | -0.010166 |
| SULAWESI BARAT | -0.450433 | 0.080170 | 0.024656 |
| MALUKU | -0.443985 | 0.086514 | 0.024672 |
| MALUKU UTARA | -0.479259 | 0.074951 | 0.009942 |
| PAPUA BARAT | -0.420570 | 0.067151 | 0.035231 |
| PAPUA | -0.492354 | 0.112136 | -0.003484 |
plt.figure(figsize=(15,7))
plt.scatter(principalComponents[:,0],principalComponents[:,1],s = 50,cmap='rainbow')
plt.xlabel('First principal component')
plt.ylabel('Second Principal Component')
Text(0, 0.5, 'Second Principal Component')
print('Explained variation per principal component: {}'.format(pca.explained_variance_ratio_))
Explained variation per principal component: [0.65229876 0.086718 0.06117068]
evc = pca.explained_variance_ratio_.cumsum()
evc
array([0.65229876, 0.73901675, 0.80018744])
plt.ylabel('% Variance Explained')
plt.xlabel('# of Features')
plt.title('PCA Analysis')
plt.style.context('seaborn-whitegrid')
#plt.axhline(y = 0.8, color = 'r', linestyle = '-')
plt.plot(evc)
[<matplotlib.lines.Line2D at 0x1929a4d3550>]
print(pca.singular_values_)
[4.82349523 1.75870583 1.47710115]
loadings = pd.DataFrame(pca.components_.T, columns=['PC1', 'PC2','PC3'],index=['Alpukat (Ton)','Belimbing (Ton)','Duku (Ton)','Durian (Ton)','Jambu Biji (Ton)','Jambu Air (Ton)','Jeruk Siam (Ton)','Jeruk Besar (Ton)','Mangga (Ton)','Manggis (Ton)','Nangka (Ton)','Nenas (Ton)','Pepaya (Ton)','Pisang (Ton)','Rambutan (Ton)','Salak (Ton)','Sawo (Ton)','Sirsak (Ton)','Sukun (Ton)','Melon (Ton)','Semangka (Ton)','Apel (Ton)','Anggur (Ton)','Stroberi (Ton)'])
loadings
#,'PC4','PC5','PC6','PC7','PC8','PC9','PC10'
| PC1 | PC2 | PC3 | |
|---|---|---|---|
| Alpukat (Ton) | 0.252451 | -0.022119 | -0.181361 |
| Belimbing (Ton) | 0.183705 | 0.242558 | 0.001300 |
| Duku (Ton) | 0.086611 | -0.070551 | 0.270380 |
| Durian (Ton) | 0.216049 | 0.135159 | -0.109172 |
| Jambu Biji (Ton) | 0.284728 | -0.118264 | 0.114043 |
| Jambu Air (Ton) | 0.211105 | -0.228374 | 0.190181 |
| Jeruk Siam (Ton) | 0.157324 | 0.292070 | -0.170702 |
| Jeruk Besar (Ton) | 0.213581 | 0.266015 | 0.256476 |
| Mangga (Ton) | 0.203846 | 0.181947 | -0.030309 |
| Manggis (Ton) | 0.184538 | -0.102790 | -0.599328 |
| Nangka (Ton) | 0.256665 | 0.015693 | 0.125663 |
| Nenas (Ton) | 0.125161 | -0.270899 | 0.192270 |
| Pepaya (Ton) | 0.221634 | 0.077304 | -0.010064 |
| Pisang (Ton) | 0.222451 | 0.029697 | -0.068860 |
| Rambutan (Ton) | 0.276040 | -0.167025 | -0.032217 |
| Salak (Ton) | 0.163654 | -0.101400 | 0.330312 |
| Sawo (Ton) | 0.277351 | -0.412929 | -0.167949 |
| Sirsak (Ton) | 0.245180 | 0.038517 | -0.120927 |
| Sukun (Ton) | 0.222552 | -0.169971 | 0.253015 |
| Melon (Ton) | 0.186388 | 0.266910 | 0.087919 |
| Semangka (Ton) | 0.207232 | 0.174681 | 0.068631 |
| Apel (Ton) | 0.148446 | 0.349422 | -0.079629 |
| Anggur (Ton) | 0.003172 | 0.074561 | -0.167209 |
| Stroberi (Ton) | 0.085463 | -0.320293 | -0.237788 |
pc1_loadings = loadings.sort_values(by='PC1', ascending=False)[['PC1']]
pc1_loadings = pc1_loadings.reset_index()
pc1_loadings.columns = ['Attribute', 'CorrelationWithPC1']
plt.bar(x=pc1_loadings['Attribute'], height=pc1_loadings['CorrelationWithPC1'], color='#087E8B')
plt.title('PCA loading scores (first principal component)', size=20)
plt.xticks(rotation='vertical')
plt.show()
wcss = []
for i in range (1,11):
kmeans_pca = KMeans(i)
kmeans_pca.fit(principalComponents)
wcss.append(kmeans_pca.inertia_)
wcss
[28.54098019819429, 9.372392587904084, 5.3572564298669425, 3.2111782243268263, 1.962483422058978, 1.3226630825115648, 0.9173946451958412, 0.5985585401544481, 0.45682420658711, 0.3127598148093504]
y=wcss
x=range(1, len(y)+1)
kn = KneeLocator(x, y, curve='convex', direction='decreasing')
print(kn.knee)
3
plt.title('The Elbow Method')
plt.xlabel('Number of clusters')
plt.ylabel('Within-cluster Sum of Squares')
plt.plot(x, y, 'bx-')
plt.vlines(kn.knee, plt.ylim()[0], plt.ylim()[1], linestyles='dashed')
<matplotlib.collections.LineCollection at 0x1929a61bfd0>
kmeans3_i = KMeans(n_clusters=3, init='k-means++',n_init=10,max_iter=300,random_state=42)
kmeans3_i.fit(principalDf)
identified_clusters_Kmeans3_i = kmeans3_i.fit_predict(principalDf)
identified_clusters_Kmeans3_i
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
kmeans3_i.feature_names_in_
array(['principal component 1', 'principal component 2',
'principal component 3'], dtype=object)
kmeans3_r = KMeans(n_clusters=3, init='random',n_init=10,max_iter=300,random_state=42)
kmeans3_r.fit(principalDf)
identified_clusters_Kmeans3_r = kmeans3_r.fit_predict(principalDf)
identified_clusters_Kmeans3_r
array([2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 1, 1, 2, 1, 2, 2, 0, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
kmeans4 = KMeans(n_clusters=4, init='k-means++',n_init=10,max_iter=300,random_state=42)
kmeans4.fit(principalDf)
identified_clusters_Kmeans4 = kmeans4.fit_predict(principalDf)
identified_clusters_Kmeans4
array([0, 3, 3, 0, 0, 0, 0, 3, 0, 0, 0, 1, 1, 0, 2, 0, 0, 3, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
kmeans4_r = KMeans(n_clusters=4, init='random',n_init=10,max_iter=300,random_state=42)
kmeans4_r.fit(principalDf)
identified_clusters_Kmeans4_r = kmeans4_r.fit_predict(principalDf)
identified_clusters_Kmeans4_r
array([2, 1, 1, 2, 2, 2, 0, 1, 0, 0, 0, 1, 3, 2, 3, 2, 2, 1, 2, 2, 0, 0,
0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0])
kmeans5 = KMeans(n_clusters=5, init='k-means++',n_init=10,max_iter=300,random_state=42)
kmeans5.fit(principalDf)
identified_clusters_Kmeans5 = kmeans5.fit_predict(principalDf)
identified_clusters_Kmeans5
array([2, 0, 0, 2, 2, 2, 2, 0, 2, 2, 2, 4, 1, 2, 3, 2, 2, 0, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
kmeans5_r = KMeans(n_clusters=5, init='random',n_init=10,max_iter=300,random_state=42)
kmeans5_r.fit(principalDf)
identified_clusters_Kmeans5_r = kmeans5_r.fit_predict(principalDf)
identified_clusters_Kmeans5_r
array([2, 1, 1, 2, 2, 2, 0, 1, 0, 0, 0, 4, 4, 2, 3, 2, 2, 1, 2, 2, 0, 0,
0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0])
plt.scatter(principalComponents[:, 0], principalComponents[:, 1], c= identified_clusters_Kmeans3_i, s=100, cmap='rainbow')
plt.title("Optimal Number of Clusters")
centers = kmeans3_i.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
plt.xlabel('First principal component')
plt.ylabel('Second Principal Component')
plt.show()
plt.scatter(principalComponents[:, 0], principalComponents[:, 1], c= identified_clusters_Kmeans3_r, s=100, cmap='rainbow')
plt.title("Optimal Number of Clusters")
centers = kmeans3_r.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)
plt.xlabel('First principal component')
plt.ylabel('Second Principal Component')
plt.show()
Kmeans_Tabel = pd.DataFrame(data =identified_clusters_Kmeans3_i, columns=['k=3 kmeans++'], index = ProduksiBuah20_22_scaled_cluster.index)
Kmeans_Tabel['k=4 kmeans++'] = identified_clusters_Kmeans4
Kmeans_Tabel['k=5 kmeans++'] = identified_clusters_Kmeans5
Kmeans_Tabel['k=3 random'] = identified_clusters_Kmeans3_r
Kmeans_Tabel['k=4 random'] = identified_clusters_Kmeans4_r
Kmeans_Tabel['k=5 random'] = identified_clusters_Kmeans5_r
Kmeans_Tabel
| k=3 kmeans++ | k=4 kmeans++ | k=5 kmeans++ | k=3 random | k=4 random | k=5 random | |
|---|---|---|---|---|---|---|
| Provinsi | ||||||
| ACEH | 0 | 0 | 2 | 2 | 2 | 2 |
| SUMATERA UTARA | 0 | 3 | 0 | 0 | 1 | 1 |
| SUMATERA BARAT | 0 | 3 | 0 | 0 | 1 | 1 |
| RIAU | 0 | 0 | 2 | 2 | 2 | 2 |
| JAMBI | 0 | 0 | 2 | 2 | 2 | 2 |
| SUMATERA SELATAN | 0 | 0 | 2 | 2 | 2 | 2 |
| BENGKULU | 0 | 0 | 2 | 2 | 0 | 0 |
| LAMPUNG | 0 | 3 | 0 | 0 | 1 | 1 |
| KEP. BANGKA BELITUNG | 0 | 0 | 2 | 2 | 0 | 0 |
| KEP. RIAU | 0 | 0 | 2 | 2 | 0 | 0 |
| DKI JAKARTA | 0 | 0 | 2 | 2 | 0 | 0 |
| JAWA BARAT | 1 | 1 | 4 | 1 | 1 | 4 |
| JAWA TENGAH | 1 | 1 | 1 | 1 | 3 | 4 |
| DI YOGYAKARTA | 0 | 0 | 2 | 2 | 2 | 2 |
| JAWA TIMUR | 2 | 2 | 3 | 1 | 3 | 3 |
| BANTEN | 0 | 0 | 2 | 2 | 2 | 2 |
| BALI | 0 | 0 | 2 | 2 | 2 | 2 |
| NUSA TENGGARA BARAT | 0 | 3 | 0 | 0 | 1 | 1 |
| NUSA TENGGARA TIMUR | 0 | 0 | 2 | 2 | 2 | 2 |
| KALIMANTAN BARAT | 0 | 0 | 2 | 2 | 2 | 2 |
| KALIMANTAN TENGAH | 0 | 0 | 2 | 2 | 0 | 0 |
| KALIMANTAN SELATAN | 0 | 0 | 2 | 2 | 0 | 0 |
| KALIMANTAN TIMUR | 0 | 0 | 2 | 2 | 0 | 0 |
| KALIMANTAN UTARA | 0 | 0 | 2 | 2 | 0 | 0 |
| SULAWESI UTARA | 0 | 0 | 2 | 2 | 0 | 0 |
| SULAWESI TENGAH | 0 | 0 | 2 | 2 | 0 | 0 |
| SULAWESI SELATAN | 0 | 0 | 2 | 2 | 2 | 2 |
| SULAWESI TENGGARA | 0 | 0 | 2 | 2 | 0 | 0 |
| GORONTALO | 0 | 0 | 2 | 2 | 0 | 0 |
| SULAWESI BARAT | 0 | 0 | 2 | 2 | 0 | 0 |
| MALUKU | 0 | 0 | 2 | 2 | 0 | 0 |
| MALUKU UTARA | 0 | 0 | 2 | 2 | 0 | 0 |
| PAPUA BARAT | 0 | 0 | 2 | 2 | 0 | 0 |
| PAPUA | 0 | 0 | 2 | 2 | 0 | 0 |
cluster3 = AgglomerativeClustering(n_clusters=3, affinity='euclidean', linkage='single')
cluster3 =cluster3.fit_predict(principalComponents)
cluster3
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)
cluster4 = AgglomerativeClustering(n_clusters=4, affinity='euclidean', linkage='single')
cluster4 =cluster4.fit_predict(principalComponents)
cluster4
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)
cluster5 = AgglomerativeClustering(n_clusters=5, affinity='euclidean', linkage='single')
cluster5 =cluster5.fit_predict(principalComponents)
cluster5
array([0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)
plt.figure(figsize=(15, 7))
plt.title("Dendrogram")
dend = dendrogram(hierarchy.linkage(principalComponents, method='single'))
plt.axhline(y=1, color='black', linestyle='--')
plt.show()
SingleLinkage_Tabel = pd.DataFrame(data =cluster3, columns=['k=3'], index = ProduksiBuah20_22_scaled_cluster.index)
SingleLinkage_Tabel['k=4'] = cluster4
SingleLinkage_Tabel['k=5'] = cluster5
SingleLinkage_Tabel
| k=3 | k=4 | k=5 | |
|---|---|---|---|
| Provinsi | |||
| ACEH | 0 | 0 | 0 |
| SUMATERA UTARA | 0 | 0 | 0 |
| SUMATERA BARAT | 0 | 0 | 4 |
| RIAU | 0 | 0 | 0 |
| JAMBI | 0 | 0 | 0 |
| SUMATERA SELATAN | 0 | 0 | 0 |
| BENGKULU | 0 | 0 | 0 |
| LAMPUNG | 0 | 0 | 0 |
| KEP. BANGKA BELITUNG | 0 | 0 | 0 |
| KEP. RIAU | 0 | 0 | 0 |
| DKI JAKARTA | 0 | 0 | 0 |
| JAWA BARAT | 0 | 3 | 3 |
| JAWA TENGAH | 2 | 2 | 2 |
| DI YOGYAKARTA | 0 | 0 | 0 |
| JAWA TIMUR | 1 | 1 | 1 |
| BANTEN | 0 | 0 | 0 |
| BALI | 0 | 0 | 0 |
| NUSA TENGGARA BARAT | 0 | 0 | 0 |
| NUSA TENGGARA TIMUR | 0 | 0 | 0 |
| KALIMANTAN BARAT | 0 | 0 | 0 |
| KALIMANTAN TENGAH | 0 | 0 | 0 |
| KALIMANTAN SELATAN | 0 | 0 | 0 |
| KALIMANTAN TIMUR | 0 | 0 | 0 |
| KALIMANTAN UTARA | 0 | 0 | 0 |
| SULAWESI UTARA | 0 | 0 | 0 |
| SULAWESI TENGAH | 0 | 0 | 0 |
| SULAWESI SELATAN | 0 | 0 | 0 |
| SULAWESI TENGGARA | 0 | 0 | 0 |
| GORONTALO | 0 | 0 | 0 |
| SULAWESI BARAT | 0 | 0 | 0 |
| MALUKU | 0 | 0 | 0 |
| MALUKU UTARA | 0 | 0 | 0 |
| PAPUA BARAT | 0 | 0 | 0 |
| PAPUA | 0 | 0 | 0 |
clf3 = NearestCentroid()
clf3.fit(principalComponents, cluster3 )
slc3=clf3.centroids_
print(slc3)
[[-0.17586741 -0.01699351 -0.02067896] [ 3.45358047 1.08084392 -0.17360785] [ 2.17417666 -0.5370515 0.83533448]]
clf4 = NearestCentroid()
clf4.fit(principalComponents, cluster4 )
slc4=clf4.centroids_
print(slc4)
[[-0.23303979 0.0154807 -0.00351477] [ 3.45358047 1.08084392 -0.17360785] [ 2.17417666 -0.5370515 0.83533448] [ 1.59647623 -1.02369398 -0.55276887]]
clf5 = NearestCentroid()
clf5.fit(principalComponents, cluster5 )
slc5=clf5.centroids_
print(slc5)
[[-0.25363976 0.02108769 0.02223899] [ 3.45358047 1.08084392 -0.17360785] [ 2.17417666 -0.5370515 0.83533448] [ 1.59647623 -1.02369398 -0.55276887] [ 0.38495947 -0.15272904 -0.77612743]]
slc3_ = np.array(slc3)
slc4_ = np.array(slc4)
slc5_ = np.array(slc5)
hkmeans3 = KMeans(n_clusters=3, init=slc3_,n_init=10,max_iter=300,random_state=42)
hkmeans3.fit(principalComponents)
identified_clusters_hKmeans3 = hkmeans3.fit_predict(principalComponents)
identified_clusters_hKmeans3
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
hkmeans4 = KMeans(n_clusters=4, init=slc4_,n_init=10,max_iter=300,random_state=42)
hkmeans4.fit(principalComponents)
identified_clusters_hKmeans4 = hkmeans4.fit_predict(principalComponents)
identified_clusters_hKmeans4
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 2, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
hkmeans5 = KMeans(n_clusters=5, init=slc5_,n_init=10,max_iter=300,random_state=42)
hkmeans5.fit(principalComponents)
identified_clusters_hKmeans5 = hkmeans5.fit_predict(principalComponents)
identified_clusters_hKmeans5
array([0, 4, 4, 0, 0, 0, 0, 4, 0, 0, 0, 3, 2, 0, 1, 0, 0, 4, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
Hierarchical_Kmeans_Tabel = pd.DataFrame(data =identified_clusters_hKmeans3, columns=['k=3 hk'], index = ProduksiBuah20_22_scaled_cluster.index)
Hierarchical_Kmeans_Tabel['k=4 hk'] = identified_clusters_hKmeans4
Hierarchical_Kmeans_Tabel['k=5 hk'] = identified_clusters_hKmeans5
Hierarchical_Kmeans_Tabel
| k=3 hk | k=4 hk | k=5 hk | |
|---|---|---|---|
| Provinsi | |||
| ACEH | 0 | 0 | 0 |
| SUMATERA UTARA | 0 | 0 | 4 |
| SUMATERA BARAT | 0 | 0 | 4 |
| RIAU | 0 | 0 | 0 |
| JAMBI | 0 | 0 | 0 |
| SUMATERA SELATAN | 0 | 0 | 0 |
| BENGKULU | 0 | 0 | 0 |
| LAMPUNG | 0 | 0 | 4 |
| KEP. BANGKA BELITUNG | 0 | 0 | 0 |
| KEP. RIAU | 0 | 0 | 0 |
| DKI JAKARTA | 0 | 0 | 0 |
| JAWA BARAT | 2 | 3 | 3 |
| JAWA TENGAH | 2 | 2 | 2 |
| DI YOGYAKARTA | 0 | 0 | 0 |
| JAWA TIMUR | 1 | 1 | 1 |
| BANTEN | 0 | 0 | 0 |
| BALI | 0 | 0 | 0 |
| NUSA TENGGARA BARAT | 0 | 0 | 4 |
| NUSA TENGGARA TIMUR | 0 | 0 | 0 |
| KALIMANTAN BARAT | 0 | 0 | 0 |
| KALIMANTAN TENGAH | 0 | 0 | 0 |
| KALIMANTAN SELATAN | 0 | 0 | 0 |
| KALIMANTAN TIMUR | 0 | 0 | 0 |
| KALIMANTAN UTARA | 0 | 0 | 0 |
| SULAWESI UTARA | 0 | 0 | 0 |
| SULAWESI TENGAH | 0 | 0 | 0 |
| SULAWESI SELATAN | 0 | 0 | 0 |
| SULAWESI TENGGARA | 0 | 0 | 0 |
| GORONTALO | 0 | 0 | 0 |
| SULAWESI BARAT | 0 | 0 | 0 |
| MALUKU | 0 | 0 | 0 |
| MALUKU UTARA | 0 | 0 | 0 |
| PAPUA BARAT | 0 | 0 | 0 |
| PAPUA | 0 | 0 | 0 |
ProduksiBuah20_22_copy = pd.concat([ProduksiBuah20_22_copy, Kmeans_Tabel,SingleLinkage_Tabel,Hierarchical_Kmeans_Tabel ], axis=1)
ProduksiBuah20_22_scaled_cluster = pd.concat([ProduksiBuah20_22_scaled_cluster, Kmeans_Tabel,SingleLinkage_Tabel,Hierarchical_Kmeans_Tabel ], axis=1)
kmeans3 = KMeans(n_clusters=3, init='k-means++',n_init=10,max_iter=300,random_state=42)
kmeans3.fit(ProduksiBuah20_22_scaled)
kmeans4 = KMeans(n_clusters=4, init='k-means++',n_init=10,max_iter=300,random_state=42)
kmeans4.fit(ProduksiBuah20_22_scaled)
kmeans5 = KMeans(n_clusters=5, init='k-means++',n_init=10,max_iter=300,random_state=42)
kmeans5.fit(ProduksiBuah20_22_scaled)
KMeans(n_clusters=5, random_state=42)
resultsk = {}
for i in range(3,6):
kmeansk = KMeans(n_clusters=i, init='k-means++',n_init=10,max_iter=300,random_state=42)
labelsk = kmeansk.fit_predict(principalComponents)
db_indexk = calinski_harabasz_score(psc, labelsk)
resultsk.update({i: db_indexk})
resultsr = {}
for i in range(3,6):
kmeansr = KMeans(n_clusters=i, init='random',n_init=10,max_iter=300,random_state=42)
labelsr = kmeansr.fit_predict(principalComponents)
db_indexr = calinski_harabasz_score(psc, labelsr)
resultsr.update({i: db_indexr})
resultshk = {}
for i in range(1,2):
kmeanshk3_ = KMeans(n_clusters=3, init=slc3_,n_init=10,max_iter=300,random_state=42)
labelshk3_ = kmeanshk3_.fit_predict(principalComponents)
db_indexhk3_ = calinski_harabasz_score(psc, labelshk3_)
resultshk.update({3: db_indexhk3_})
kmeanshk4_ = KMeans(n_clusters=4, init=slc4_,n_init=10,max_iter=300,random_state=42)
labelshk4_ = kmeanshk4_.fit_predict(principalComponents)
db_indexhk4_ = calinski_harabasz_score(psc, labelshk4_)
resultshk.update({4: db_indexhk4_})
kmeanshk5_ = KMeans(n_clusters=5, init=slc5_,n_init=10,max_iter=300,random_state=42)
labelshk5_ = kmeanshk5_.fit_predict(principalComponents)
db_indexhk5_ = calinski_harabasz_score(psc, labelshk5_)
resultshk.update({5: db_indexhk5_})
resultssl = {}
for i in range(3,6):
sl = AgglomerativeClustering(n_clusters=i, affinity='euclidean', linkage='single')
labelssl = sl.fit_predict(principalComponents)
db_indexsl = calinski_harabasz_score(psc, labelssl)
resultssl.update({i: db_indexsl})
sns.set(rc={"figure.figsize":(10, 6)})
plt.plot(list(resultsk.keys()), list(resultsk.values()), label='K-Means++')
plt.plot(list(resultsr.keys()), list(resultsr.values()), label='Random')
plt.plot(list(resultshk.keys()), list(resultshk.values()), label='Hierarchical K-Means')
plt.plot(list(resultssl.keys()), list(resultssl.values()), label='Single Linkage')
plt.title('Calinski Harabasz Score 2020-2022')
plt.xlabel("Number of clusters")
plt.ylabel("Calinski-Harabasz Index")
plt.legend()
plt.show()
ch_index_kmeans3_i = calinski_harabasz_score(psc, Kmeans_Tabel['k=3 kmeans++'])
ch_index_kmeans4_i = calinski_harabasz_score(psc, Kmeans_Tabel['k=4 kmeans++'])
ch_index_kmeans5_i = calinski_harabasz_score(psc, Kmeans_Tabel['k=5 kmeans++'])
ch_index_kmeans3_r = calinski_harabasz_score(psc, Kmeans_Tabel['k=3 random'])
ch_index_kmeans4_r = calinski_harabasz_score(psc, Kmeans_Tabel['k=4 random'])
ch_index_kmeans5_r = calinski_harabasz_score(psc, Kmeans_Tabel['k=5 random'])
ch_index_hkmeans3 = calinski_harabasz_score(psc, Hierarchical_Kmeans_Tabel['k=3 hk'])
ch_index_hkmeans4 = calinski_harabasz_score(psc, Hierarchical_Kmeans_Tabel['k=4 hk'])
ch_index_hkmeans5 = calinski_harabasz_score(psc, Hierarchical_Kmeans_Tabel['k=5 hk'])
print('Calinski Harabasz Index K-means 3 kmeans++:',ch_index_kmeans3_i)
print('Calinski Harabasz Index K-means 4 kmeans++:',ch_index_kmeans4_i)
print('Calinski Harabasz Index K-means 5 kmeans++:',ch_index_kmeans5_i)
print('Calinski Harabasz Index K-means 3 random:',ch_index_kmeans3_r)
print('Calinski Harabasz Index K-means 3 random:',ch_index_kmeans4_r)
print('Calinski Harabasz Index K-means 3 random:',ch_index_kmeans5_r)
print('Calinski Harabasz Index HK-means 3:',ch_index_hkmeans3)
print('Calinski Harabasz Index HK-means 4:',ch_index_hkmeans4)
print('Calinski Harabasz Index HK-means 5:',ch_index_hkmeans5)
Calinski Harabasz Index K-means 3 kmeans++: 30.27567892886422 Calinski Harabasz Index K-means 4 kmeans++: 28.756485814113994 Calinski Harabasz Index K-means 5 kmeans++: 27.523183992221334 Calinski Harabasz Index K-means 3 random: 25.64022381449592 Calinski Harabasz Index K-means 3 random: 19.29857643823079 Calinski Harabasz Index K-means 3 random: 24.12419255437759 Calinski Harabasz Index HK-means 3: 30.27567892886422 Calinski Harabasz Index HK-means 4: 24.59244988312941 Calinski Harabasz Index HK-means 5: 27.523183992221334
ch_index_SingleLinkage3 = calinski_harabasz_score(psc, cluster3 )
ch_index_SingleLinkage4 = calinski_harabasz_score(psc, cluster4 )
ch_index_SingleLinkage5 = calinski_harabasz_score(psc, cluster5 )
print('Calinski Harabasz Index Single Linkage 3:',ch_index_SingleLinkage3)
print('Calinski Harabasz Index Single Linkage 4:',ch_index_SingleLinkage4)
print('Calinski Harabasz Index Single Linkage 5:',ch_index_SingleLinkage5)
Calinski Harabasz Index Single Linkage 3: 20.493703416896846 Calinski Harabasz Index Single Linkage 4: 24.59244988312941 Calinski Harabasz Index Single Linkage 5: 22.101845709584115
resultsk = {}
for i in range(3,6):
kmeansk = KMeans(n_clusters=i, init='k-means++',n_init=10,max_iter=300,random_state=42)
labelsk = kmeansk.fit_predict(principalComponents)
db_indexk = silhouette_score(psc, labelsk)
resultsk.update({i: db_indexk})
resultsr = {}
for i in range(3,6):
kmeansr = KMeans(n_clusters=i, init='random',n_init=10,max_iter=300,random_state=42)
labelsr = kmeansr.fit_predict(principalComponents)
db_indexr = silhouette_score(psc, labelsr)
resultsr.update({i: db_indexr})
resultshk = {}
for i in range(1,2):
kmeanshk3_ = KMeans(n_clusters=3, init=slc3_,n_init=10,max_iter=300,random_state=42)
labelshk3_ = kmeanshk3_.fit_predict(principalComponents)
db_indexhk3_ = silhouette_score(psc, labelshk3_)
resultshk.update({3: db_indexhk3_})
kmeanshk4_ = KMeans(n_clusters=4, init=slc4_,n_init=10,max_iter=300,random_state=42)
labelshk4_ = kmeanshk4_.fit_predict(principalComponents)
db_indexhk4_ = silhouette_score(psc, labelshk4_)
resultshk.update({4: db_indexhk4_})
kmeanshk5_ = KMeans(n_clusters=5, init=slc5_,n_init=10,max_iter=300,random_state=42)
labelshk5_ = kmeanshk5_.fit_predict(principalComponents)
db_indexhk5_ = silhouette_score(psc, labelshk5_)
resultshk.update({5: db_indexhk5_})
resultssl = {}
for i in range(3,6):
sl = AgglomerativeClustering(n_clusters=i, affinity='euclidean', linkage='single')
labelssl = sl.fit_predict(principalComponents)
db_indexsl = silhouette_score(psc, labelssl)
resultssl.update({i: db_indexsl})
sns.set(rc={"figure.figsize":(10, 6)})
plt.plot(list(resultsk.keys()), list(resultsk.values()), label='K-Means++')
plt.plot(list(resultsr.keys()), list(resultsr.values()), label='Random')
plt.plot(list(resultshk.keys()), list(resultshk.values()), label='Hierarchical K-Means')
plt.plot(list(resultssl.keys()), list(resultssl.values()), label='Single Linkage')
plt.title('Silhouette Coefficient Score 2020-2022')
plt.xlabel("Number of clusters")
plt.ylabel("Silhouette Coefficient Index")
plt.legend()
plt.show()
score3_i = silhouette_score(psc, Kmeans_Tabel['k=3 kmeans++'], metric='euclidean')
score4_i = silhouette_score(psc, Kmeans_Tabel['k=4 kmeans++'], metric='euclidean')
score5_i = silhouette_score(psc, Kmeans_Tabel['k=5 kmeans++'], metric='euclidean')
score3_r = silhouette_score(psc, Kmeans_Tabel['k=3 random'], metric='euclidean')
score4_r = silhouette_score(psc, Kmeans_Tabel['k=4 random'], metric='euclidean')
score5_r = silhouette_score(psc, Kmeans_Tabel['k=5 random'], metric='euclidean')
hscore3 = silhouette_score(psc, identified_clusters_hKmeans3, metric='euclidean')
hscore4 = silhouette_score(psc, identified_clusters_hKmeans4, metric='euclidean')
hscore5 = silhouette_score(psc, identified_clusters_hKmeans5, metric='euclidean')
print('Silhouette coefficient K-means 3 k-means++:',score3_i)
print('Silhouette coefficient K-means 4 k-means++:',score4_i)
print('Silhouette coefficient K-means 5 k-means++:',score5_i)
print('Silhouette coefficient K-means 3 random:',score3_r)
print('Silhouette coefficient K-means 4 random:',score4_r)
print('Silhouette coefficient K-means 5 random:',score5_r)
print('Silhouette coefficient HK-means 3:',hscore3)
print('Silhouette coefficient HK-means 4:',hscore4)
print('Silhouette coefficient HK-means 5:',hscore5)
Silhouette coefficient K-means 3 k-means++: 0.6745383942607401 Silhouette coefficient K-means 4 k-means++: 0.5024000913928874 Silhouette coefficient K-means 5 k-means++: 0.4941079464595051 Silhouette coefficient K-means 3 random: 0.4991462727828761 Silhouette coefficient K-means 4 random: 0.2957044656981662 Silhouette coefficient K-means 5 random: 0.3073022491918449 Silhouette coefficient HK-means 3: 0.6745383942607401 Silhouette coefficient HK-means 4: 0.6408219489733553 Silhouette coefficient HK-means 5: 0.4941079464595051
scores3 = silhouette_score(psc, cluster3, metric='euclidean')
scores4 = silhouette_score(psc, cluster4, metric='euclidean')
scores5 = silhouette_score(psc, cluster5, metric='euclidean')
print('Silhouette coefficient K-means 3:',scores3)
print('Silhouette coefficient K-means 4:',scores4)
print('Silhouette coefficient K-means 5:',scores5)
Silhouette coefficient K-means 3: 0.6560560906881828 Silhouette coefficient K-means 4: 0.6408219489733553 Silhouette coefficient K-means 5: 0.47206141916418703
ch_index_kmeans3_i = calinski_harabasz_score(pb, Kmeans_Tabel['k=3 kmeans++'])
ch_index_kmeans4_i = calinski_harabasz_score(pb, Kmeans_Tabel['k=4 kmeans++'])
ch_index_kmeans5_i = calinski_harabasz_score(pb, Kmeans_Tabel['k=5 kmeans++'])
ch_index_kmeans3_r = calinski_harabasz_score(pb, Kmeans_Tabel['k=3 random'])
ch_index_kmeans4_r = calinski_harabasz_score(pb, Kmeans_Tabel['k=4 random'])
ch_index_kmeans5_r = calinski_harabasz_score(pb, Kmeans_Tabel['k=5 random'])
ch_index_hkmeans3 = calinski_harabasz_score(pb, Hierarchical_Kmeans_Tabel['k=3 hk'])
ch_index_hkmeans4 = calinski_harabasz_score(pb, Hierarchical_Kmeans_Tabel['k=4 hk'])
ch_index_hkmeans5 = calinski_harabasz_score(pb, Hierarchical_Kmeans_Tabel['k=5 hk'])
print('Calinski Harabasz Index K-means 3 kmeans++:',ch_index_kmeans3_i)
print('Calinski Harabasz Index K-means 4 kmeans++:',ch_index_kmeans4_i)
print('Calinski Harabasz Index K-means 5 kmeans++:',ch_index_kmeans5_i)
print('Calinski Harabasz Index K-means 3 random:',ch_index_kmeans3_r)
print('Calinski Harabasz Index K-means 3 random:',ch_index_kmeans4_r)
print('Calinski Harabasz Index K-means 3 random:',ch_index_kmeans5_r)
print('Calinski Harabasz Index HK-means 3:',ch_index_hkmeans3)
print('Calinski Harabasz Index HK-means 4:',ch_index_hkmeans4)
print('Calinski Harabasz Index HK-means 5:',ch_index_hkmeans5)
Calinski Harabasz Index K-means 3 kmeans++: 57.982645118375565 Calinski Harabasz Index K-means 4 kmeans++: 49.69512635670209 Calinski Harabasz Index K-means 5 kmeans++: 41.93869884583579 Calinski Harabasz Index K-means 3 random: 29.186360201480685 Calinski Harabasz Index K-means 3 random: 16.58936536079578 Calinski Harabasz Index K-means 3 random: 39.53415387813175 Calinski Harabasz Index HK-means 3: 57.98264511837555 Calinski Harabasz Index HK-means 4: 42.40873814090654 Calinski Harabasz Index HK-means 5: 41.93869884583579
ch_index_SingleLinkage3 = calinski_harabasz_score(psc, cluster3 )
ch_index_SingleLinkage4 = calinski_harabasz_score(psc, cluster4 )
ch_index_SingleLinkage5 = calinski_harabasz_score(psc, cluster5 )
print('Calinski Harabasz Index Single Linkage 3:',ch_index_SingleLinkage3)
print('Calinski Harabasz Index Single Linkage 4:',ch_index_SingleLinkage4)
print('Calinski Harabasz Index Single Linkage 5:',ch_index_SingleLinkage5)
Calinski Harabasz Index Single Linkage 3: 20.493703416896846 Calinski Harabasz Index Single Linkage 4: 24.59244988312941 Calinski Harabasz Index Single Linkage 5: 22.101845709584115
score3_i = silhouette_score(pb, Kmeans_Tabel['k=3 kmeans++'], metric='euclidean')
score4_i = silhouette_score(pb, Kmeans_Tabel['k=4 kmeans++'], metric='euclidean')
score5_i = silhouette_score(pb, Kmeans_Tabel['k=5 kmeans++'], metric='euclidean')
score3_r = silhouette_score(pb, Kmeans_Tabel['k=3 random'], metric='euclidean')
score4_r = silhouette_score(pb, Kmeans_Tabel['k=4 random'], metric='euclidean')
score5_r = silhouette_score(pb, Kmeans_Tabel['k=5 random'], metric='euclidean')
hscore3 = silhouette_score(pb, identified_clusters_hKmeans3, metric='euclidean')
hscore4 = silhouette_score(pb, identified_clusters_hKmeans4, metric='euclidean')
hscore5 = silhouette_score(pb, identified_clusters_hKmeans5, metric='euclidean')
print('Silhouette coefficient K-means 3 k-means++:',score3_i)
print('Silhouette coefficient K-means 4 k-means++:',score4_i)
print('Silhouette coefficient K-means 5 k-means++:',score5_i)
print('Silhouette coefficient K-means 3 random:',score3_r)
print('Silhouette coefficient K-means 4 random:',score4_r)
print('Silhouette coefficient K-means 5 random:',score5_r)
print('Silhouette coefficient HK-means 3:',hscore3)
print('Silhouette coefficient HK-means 4:',hscore4)
print('Silhouette coefficient HK-means 5:',hscore5)
Silhouette coefficient K-means 3 k-means++: 0.726621062844469 Silhouette coefficient K-means 4 k-means++: 0.5201142248204489 Silhouette coefficient K-means 5 k-means++: 0.4986093315415186 Silhouette coefficient K-means 3 random: 0.5105054377850797 Silhouette coefficient K-means 4 random: 0.23293788852406308 Silhouette coefficient K-means 5 random: 0.25895015082520845 Silhouette coefficient HK-means 3: 0.726621062844469 Silhouette coefficient HK-means 4: 0.6751365567576909 Silhouette coefficient HK-means 5: 0.4986093315415186
scores3 = silhouette_score(pb, cluster3, metric='euclidean')
scores4 = silhouette_score(pb, cluster4, metric='euclidean')
scores5 = silhouette_score(pb, cluster5, metric='euclidean')
print('Silhouette coefficient K-means 3:',scores3)
print('Silhouette coefficient K-means 4:',scores4)
print('Silhouette coefficient K-means 5:',scores5)
Silhouette coefficient K-means 3: 0.6340872807029864 Silhouette coefficient K-means 4: 0.6751365567576909 Silhouette coefficient K-means 5: 0.12120922846162763
class Radar(object):
def __init__(self, figure, title, labels, rect=None):
if rect is None:
rect = [0.05, 0.05, 0.9, 0.9]
self.n = len(title)
self.angles = np.arange(0, 360, 360.0/self.n)
self.axes = [figure.add_axes(rect, projection='polar', label='axes%d' % i) for i in range(self.n)]
self.ax = self.axes[0]
self.ax.set_thetagrids(self.angles, labels=title, fontsize=14, backgroundcolor="white",zorder=999) # Feature names
self.ax.set_yticklabels([])
for ax in self.axes[1:]:
ax.xaxis.set_visible(False)
ax.set_yticklabels([])
ax.set_zorder(-99)
for ax, angle, label in zip(self.axes, self.angles, labels):
ax.spines['polar'].set_color('black')
ax.spines['polar'].set_zorder(-99)
def plot(self, values, *args, **kw):
angle = np.deg2rad(np.r_[self.angles, self.angles[0]])
values = np.r_[values, values[0]]
self.ax.plot(angle, values, *args, **kw)
kw['label'] = '_noLabel'
self.ax.fill(angle, values,*args,**kw)
# Let's define max. 6 different cluster colors - if you like you can add here more.
cluster_colors = ['#b4d2b1', '#568f8b', '#1d4a60', '#cd7e59', '#ddb247', '#d15252']
X_mean3_i = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']),
ProduksiBuah20_22_copy.groupby('k=3 kmeans++').mean().T], axis=1)
X_dev_rel3_i = X_mean3_i.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel3_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean3_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean3_i = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']),
ProduksiBuah20_22_scaled_cluster.groupby('k=3 kmeans++').mean().T], axis=1)
X_std_dev_rel3_i = X_std_mean3_i.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel3_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean3_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans3.feature_names_in_)
radar = Radar(fig, kmeans3.feature_names_in_, np.unique(kmeans3.labels_))
for k in range(0,kmeans3.n_clusters):
cluster_data = X_std_mean3_i[k].values.tolist()
radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))
radar.ax.legend()
radar.ax.set_title("Cluster 3 kmeans++: Feature means per cluster", size=22, pad=60)
plt.show()
X_mean4_i = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']),
ProduksiBuah20_22_copy.groupby('k=4 kmeans++').mean().T], axis=1)
X_dev_rel4_i = X_mean4_i.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel4_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean4_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean4_i = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']),
ProduksiBuah20_22_scaled_cluster.groupby('k=4 kmeans++').mean().T], axis=1)
X_std_dev_rel4_i = X_std_mean4_i.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel4_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean4_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans4.feature_names_in_)
radar = Radar(fig, kmeans4.feature_names_in_, np.unique(kmeans3.labels_))
for k in range(0,kmeans4.n_clusters):
cluster_data = X_std_mean4_i[k].values.tolist()
radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))
radar.ax.legend()
radar.ax.set_title("Cluster 4 kmeans++: Feature means per cluster", size=22, pad=60)
plt.show()
X_mean5_i = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']),
ProduksiBuah20_22_copy.groupby('k=5 kmeans++').mean().T], axis=1)
X_dev_rel5_i = X_mean5_i.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel5_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean5_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean5_i = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']),
ProduksiBuah20_22_scaled_cluster.groupby('k=5 kmeans++').mean().T], axis=1)
X_std_dev_rel5_i = X_std_mean5_i.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel5_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean5_i.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans5.feature_names_in_)
radar = Radar(fig, kmeans5.feature_names_in_, np.unique(kmeans5.labels_))
for k in range(0,kmeans5.n_clusters):
cluster_data = X_std_mean5_i[k].values.tolist()
radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))
radar.ax.legend()
radar.ax.set_title("Cluster 5 kmeans++: Feature means per cluster", size=22, pad=60)
plt.show()
X_mean3_r = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']),
ProduksiBuah20_22_copy.groupby('k=3 random').mean().T], axis=1)
X_dev_rel3_r = X_mean3_r.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel3_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean3_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean3_r = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']),
ProduksiBuah20_22_scaled_cluster.groupby('k=3 random').mean().T], axis=1)
X_std_dev_rel3_r = X_std_mean3_r.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel3_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean3_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans3.feature_names_in_)
radar = Radar(fig, kmeans3.feature_names_in_, np.unique(kmeans3.labels_))
for k in range(0,kmeans3.n_clusters):
cluster_data = X_std_mean3_r[k].values.tolist()
radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))
radar.ax.legend()
radar.ax.set_title("Cluster 3 random: Feature means per cluster", size=22, pad=60)
plt.show()
X_mean4_r = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']),
ProduksiBuah20_22_copy.groupby('k=4 random').mean().T], axis=1)
X_dev_rel4_r = X_mean4_r.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel4_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean4_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean4_r = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']),
ProduksiBuah20_22_scaled_cluster.groupby('k=4 random').mean().T], axis=1)
X_std_dev_rel4_r = X_std_mean4_r.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel4_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean4_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans4.feature_names_in_)
radar = Radar(fig, kmeans4.feature_names_in_, np.unique(kmeans4.labels_))
for k in range(0,kmeans4.n_clusters):
cluster_data = X_std_mean4_r[k].values.tolist()
radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))
radar.ax.legend()
radar.ax.set_title("Cluster 4 random: Feature means per cluster", size=22, pad=60)
plt.show()
X_mean5_r = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']),
ProduksiBuah20_22_copy.groupby('k=5 random').mean().T], axis=1)
X_dev_rel5_r = X_mean5_r.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel5_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean5_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean5_r = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']),
ProduksiBuah20_22_scaled_cluster.groupby('k=5 random').mean().T], axis=1)
X_std_dev_rel5_r = X_std_mean5_r.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel5_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean5_r.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans5.feature_names_in_)
radar = Radar(fig, kmeans5.feature_names_in_, np.unique(kmeans5.labels_))
for k in range(0,kmeans5.n_clusters):
cluster_data = X_std_mean5_r[k].values.tolist()
radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))
radar.ax.legend()
radar.ax.set_title("Cluster 5 random: Feature means per cluster", size=22, pad=60)
plt.show()
X_mean3_hk = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']),
ProduksiBuah20_22_copy.groupby('k=3 hk').mean().T], axis=1)
X_dev_rel3_hk = X_mean3_hk.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel3_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean3_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean3_hk = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']),
ProduksiBuah20_22_scaled_cluster.groupby('k=3 hk').mean().T], axis=1)
X_std_dev_rel3_hk = X_std_mean3_hk.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel3_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean3_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans3.feature_names_in_)
radar = Radar(fig, kmeans3.feature_names_in_, np.unique(kmeans3.labels_))
for k in range(0,kmeans3.n_clusters):
cluster_data = X_std_mean3_hk[k].values.tolist()
radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))
radar.ax.legend()
radar.ax.set_title("Cluster 3 hierarchical kmeans: Feature means per cluster", size=22, pad=60)
plt.show()
X_mean4_hk = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']),
ProduksiBuah20_22_copy.groupby('k=4 hk').mean().T], axis=1)
X_dev_rel4_hk = X_mean4_hk.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel4_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean4_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean4_hk = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']),
ProduksiBuah20_22_scaled_cluster.groupby('k=4 hk').mean().T], axis=1)
X_std_dev_rel4_hk = X_std_mean4_hk.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel4_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean4_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans4.feature_names_in_)
radar = Radar(fig, kmeans4.feature_names_in_, np.unique(kmeans4.labels_))
for k in range(0,kmeans4.n_clusters):
cluster_data = X_std_mean4_hk[k].values.tolist()
radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))
radar.ax.legend()
radar.ax.set_title("Cluster 4 hierarchical kmeans: Feature means per cluster", size=22, pad=60)
plt.show()
X_mean5_hk = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']),
ProduksiBuah20_22_copy.groupby('k=5 hk').mean().T], axis=1)
X_dev_rel5_hk = X_mean5_hk.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel5_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean5_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean5_hk = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']),
ProduksiBuah20_22_scaled_cluster.groupby('k=5 hk').mean().T], axis=1)
X_std_dev_rel5_hk = X_std_mean5_hk.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel5_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean5_hk.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans5.feature_names_in_)
radar = Radar(fig, kmeans5.feature_names_in_, np.unique(kmeans5.labels_))
for k in range(0,kmeans5.n_clusters):
cluster_data = X_std_mean5_hk[k].values.tolist()
radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))
radar.ax.legend()
radar.ax.set_title("Cluster 5 hierarchical kmeans: Feature means per cluster", size=22, pad=60)
plt.show()
X_mean3_sl = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']),
ProduksiBuah20_22_copy.groupby('k=3').mean().T], axis=1)
X_dev_rel3_sl = X_mean3_sl.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel3_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean3_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean3_sl = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']),
ProduksiBuah20_22_scaled_cluster.groupby('k=3').mean().T], axis=1)
X_std_dev_rel3_sl = X_std_mean3_sl.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel3_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean3_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans3.feature_names_in_)
radar = Radar(fig, kmeans3.feature_names_in_, np.unique(kmeans3.labels_))
for k in range(0,kmeans3.n_clusters):
cluster_data = X_std_mean3_sl[k].values.tolist()
radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))
radar.ax.legend()
radar.ax.set_title("Cluster 3 Single Linkage: Feature means per cluster", size=22, pad=60)
plt.show()
X_mean4_sl = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']),
ProduksiBuah20_22_copy.groupby('k=4').mean().T], axis=1)
X_dev_rel4_sl = X_mean4_sl.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel4_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean4_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean4_sl = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']),
ProduksiBuah20_22_scaled_cluster.groupby('k=4').mean().T], axis=1)
X_std_dev_rel4_sl = X_std_mean4_sl.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel4_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean4_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans4.feature_names_in_)
radar = Radar(fig, kmeans4.feature_names_in_, np.unique(kmeans4.labels_))
for k in range(0,kmeans4.n_clusters):
cluster_data = X_std_mean4_sl[k].values.tolist()
radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))
radar.ax.legend()
radar.ax.set_title("Cluster 4 Single Linkage: Feature means per cluster", size=22, pad=60)
plt.show()
X_mean5_sl = pd.concat([pd.DataFrame(ProduksiBuah20_22_copy.mean(), columns=['mean']),
ProduksiBuah20_22_copy.groupby('k=5').mean().T], axis=1)
X_dev_rel5_sl = X_mean5_sl.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_dev_rel5_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_mean5_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean5_sl = pd.concat([pd.DataFrame(ProduksiBuah20_22_scaled_cluster.mean(), columns=['mean']),
ProduksiBuah20_22_scaled_cluster.groupby('k=5').mean().T], axis=1)
X_std_dev_rel5_sl = X_std_mean5_sl.apply(lambda x: round((x-x['mean'])/x['mean'],2)*100, axis = 1)
X_std_dev_rel5_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
X_std_mean5_sl.drop(columns=['mean'],index=['k=3 kmeans++','k=4 kmeans++','k=5 kmeans++','k=3 random','k=4 random','k=5 random','k=3','k=4','k=5','k=3 hk','k=4 hk','k=5 hk'], inplace=True)
fig = plt.figure(figsize=(8, 8))
no_features = len(kmeans5.feature_names_in_)
radar = Radar(fig, kmeans5.feature_names_in_, np.unique(kmeans5.labels_))
for k in range(0,kmeans5.n_clusters):
cluster_data = X_std_mean5_sl[k].values.tolist()
radar.plot(cluster_data, '-', lw=2, color=cluster_colors[k], alpha=0.7, label='cluster {}'.format(k))
radar.ax.legend()
radar.ax.set_title("Cluster 5 Single Linkage: Feature means per cluster", size=22, pad=60)
plt.show()